/*    This program merges LBD and innovation index to prepare data for regressions. 

*/


log using "mk_regdata_mylog.log", replace

global path "[redacted]"
global programs "${path}programs/"
global logs "${path}logs/"
global rawlbd "/projects/data/lbd/"
global imported "[redacted]"
global inter "${path}data/inter/"
global output "${path}data/output/"
global tables "${path}results/tables/"
global graphs "${path}results/graphs/"


************************
*** Prep Innov Index ***
************************

foreach year of numlist 1977/1997 {
   use ${inter}indinnovindex_xsection_fipssic3.dta, clear
   gen year=`year'
   save ${inter}innovindex_`year'.dta, replace
}
* In memory: ${inter}innovindex_1997.dta
foreach year of numlist 1977/1996 {
   append using ${inter}innovindex_`year'.dta
}
save ${inter}innovindex_fipssic3_1977_1997.dta, replace

foreach year of numlist 1977/1997 {
   rm ${inter}innovindex_`year'.dta
}

contract fips univcont univsur
drop _freq
duplicates tag fips, gen(dup)
tab dup, miss
gsort fips -univcont -univsur
by fips: drop if _n>1
save ${inter}univfipslist.dta, replace

************************
*** END Innov Index ***
************************


*****************************************************************
*** LBD - import, organize, get ready to merge in innov index ***
*****************************************************************

use ${inter}lbd_1977_1997_fips_sic3.dta, clear

merge 1:1 fips sic3 year using ${inter}innovindex_fipssic3_1977_1997.dta
rename _merge merge_innovindex

unab eslist: es*
foreach var in emp pay entemp muentemp suentemp entrants `eslist' ppw avgestsize entrate numestabs  survincemp97 dyincemp97 survincemp dyincemp cyentrant cyexit {
   replace `var'=0 if `var'==. & merge_innovindex==2
}

*********************************
*** END LBD-Innov Index Merge ***
*********************************


*************************
*** BALANCE THE PANEL ***
*************************


foreach year of numlist 1977/1997 {
   use ${inter}temp_prebalance.dta, clear
   contract fips sic3
   drop _freq
   gen year=`year'
  save ${inter}fips_sic_`year'_list.dta, replace
}
* In memory: ${inter}fips_sic_1997_list.dta  
foreach year of numlist 1977/1996 {
   append using ${inter}fips_sic_`year'_list.dta
}
sort fips sic3 year
save ${inter}fips_sic_year_list.dta, replace

use ${inter}temp_prebalance.dta, clear
merge 1:1 fips sic3 year using ${inter}fips_sic_year_list.dta
unab eslist: es*
foreach var in emp pay entemp muentemp suentemp entrants `eslist'  ppw avgestsize entrate numestabs  survincemp97 dyincemp97 survincemp dyincemp cyentrant cyexit {
   replace `var'=0 if `var'==. & _merge==2
}
drop _merge



foreach year in 1975 1980 1985 1990 {
   foreach var in  cmfg cuse {
      rename `var'_freq`year' `var'`year'
      tab year if `var'`year'==.
      tab univcont if `var'`year'==., miss
      tab univsur if `var'`year'==., miss
      gen dr`var'`year'=(`var'`year'==.)
      egen altz`var'`year' = std(`var'`year'), mean(0) std(1)
      replace `var'`year' = 0 if `var'`year'==.
      egen mean`var'`year' = mean(`var'`year')
      egen sd`var'`year' = sd(`var'`year')
      gen z`var'`year' = (`var'`year' - mean`var'`year')/sd`var'`year'
   }
   drop mean* sd*
}

merge m:1 fips using ${inter}univfipslist.dta, update replace
drop _merge
replace univcont=0 if univcont==.
replace univsur=0 if  univsur==.


*************************
*** END BALANCING     ***
*************************

****************************************
*** MERGE County-Level External Data ***
****************************************

merge m:1 fips using ${inter}popdens_bashare_fips_1980.dta
drop if _m==2
drop _m


****************************************
*** MERGE County-Level External Data ***
****************************************


*********************************
*** Calc Vars for Regressions ***
*********************************

egen cntyemp1977 = sum(emp) if year==1977, by(fips)
egen temp = max(cntyemp1977), by(fips)
replace cntyemp1977 = temp if cntyemp1977==. & temp!=.
drop temp

* dummy for after bayh dole
gen abd = 0 if year<=1980
replace abd = 1 if year>1980 & year!=.

drop *nscmfg* *nscuse*
foreach year in 1975 1980 1985 1990 {
   foreach var in cmfg cuse {
      gen xabdz`var'`year' = abd*z`var'`year'
   }
}
foreach year in 1980  {
   foreach var in cuse {
      gen xabd`var'`year' = abd*`var'`year'
   }
}

foreach var in univcont univsur {
   gen xabd`var' = abd*`var'
}

unab eslist: es*
foreach var in emp pay ppw entemp muentemp suentemp entrants  cyentrant cyexit `eslist' avgestsize entrate numestabs {
   gen l`var'=ln(`var')
   gen l1`var'=ln(1+`var')
   gen l2`var'=ln(`var'+(`var'^2+1)^0.5)
}

drop numsic numfips
egen numfip = group(fips year)
label var numfip "numeric fips"
destring sic3, gen(numsic3) force
drop if numsic3==.
destring fips, gen(numfips)

preserve
foreach var in cuse1980 {
   foreach year of numlist 1977/1980, 1981/1997 {
      gen xz`var'I`year' = 0
      replace xz`var'I`year' = z`var' if year==`year'
      gen xzn`var'I`year' = 0
      replace xzn`var'I`year' = -z`var' if year==`year'
   }
}

compress
save ${output}lbd_innovindex_forgraphs.dta, replace
restore


gen trend = year - 1980
gen xtrendabd = trend*abd
foreach year in 1980 {
   foreach var in cuse {
      gen xtrendz`var'`year' = trend*z`var'`year'
      gen xxtrendabdz`var'`year' = trend*z`var'`year'*abd
   }
}


compress
save ${output}lbd_innovindex.dta, replace

log close

